## Setting up the basics
library(ggplot2)
library(lme4)
## Loading required package: Matrix
library(stargazer)
## 
## Please cite as:
##  Hlavac, Marek (2018). stargazer: Well-Formatted Regression and Summary Statistics Tables.
##  R package version 5.2.1. https://CRAN.R-project.org/package=stargazer
library(knitr)
library(ggsignif)
library(plotrix)
folder = "~/Google Drive/Melbourne/UNIMELB/Research/Complexity Project/KS-IC/Code/Behavioural-Analysis"
setwd(folder)
knitr::opts_knit$set(root.dir = folder)

#Input Parameters
source(paste0(folder,"/Input/Behav_IC/V1.R"))

#Import functions
source("DescriptiveFunctions.R")
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
## 
## Attaching package: 'reshape'
## The following object is masked from 'package:dplyr':
## 
##     rename
## The following object is masked from 'package:Matrix':
## 
##     expand

#Output type for tables. Use "html" to view the output .html file and use "latex" to export .tex tables
outputType="html"
#Stores a list of all the regressions that are run
allModels=vector("list", length=0)

#' Create the formatted table for statistical model export
#' 
#' @param model model to be exported
#' @param title Caption shown aththe top of the table
#' @param outputType "html" or "latex"
#' @param outputName The name of the output file
#'
#' @return The text of the table
#' @export table Exports tables if outputType==Latex
modelTableStyle = function(model,title,outputType,outputName){
  print(outputName)
  # Stores the model in the list with all the models
  allModels[[outputName]]<<-model
  if (outputType=="html"){
      table = stargazer(model, type=outputType, report=('vc*sp'),#(('vc*p'))
                        column.labels=c("Random Intercept"), dep.var.caption = title, 
                        dep.var.labels.include = TRUE,model.names=FALSE, 
                        align=TRUE, column.sep.width = "5000pt") 
  } else if (outputType=="latex"){
          table = stargazer(model, type=outputType, report=('vc*sp'),
                            column.labels=c("Random Intercept"), dep.var.caption = title, 
                            dep.var.labels.include = TRUE,model.names=FALSE, align=TRUE, 
                            column.sep.width = "0pt", 
                            out = paste0("Output/Tables/",outputName,".tex"),table.placement="H")  
  }
  #return(table)
}

#' Exports the formatted plot as pdf for report
#' @param plot The ggplot to be exported
#' @param outputName The name of the output file
#'
#' @return 
#' @export plot formatted plot as pdf 
plotExport = function(plot,outputName){
  print(outputName)
  print(plot)
  path = "/Output/Figures/"
  print(folder)
  #folder="~/Google Drive/Melbourne/UNIMELB/Research/Complexity Project/KS-IC/Code/Behavioural-Analysis"
  pdf(file=paste0(folder,path,outputName,".pdf"))
  print(plot)
  dev.off()
}

#' Export non-regression-based table to html or latex using Pander
#'
#' @param table The table to be formated
#' @param title Caption shown ath the top of the table
#' @param outputType html" or "latex"
#' @param outputName The name of the output file
#'
#' @return The formatted text of the table
#' @export table Exports tables if outputType==Latex
exportTable = function(table,title,outputType,outputName){
  print(outputName)
  if (outputType=="html"){
      table = stargazer(table, type=outputType, dep.var.caption = title, align=TRUE,
                        column.sep.width = "5000pt",summary=FALSE,rownames = FALSE)  
  } else if (outputType=="latex"){
          table = stargazer(table, type=outputType, dep.var.caption = title, align=TRUE,
                            column.sep.width = "0pt",
                            out = paste0("Output/Tables/",outputName,".tex"), 
                            table.placement="H",
                          summary=FALSE,rownames = FALSE)  
  }
}

1 Knapsack Decision Variant

#Imports Decision Data
dataAllDec = importTrialInfo(folderDataDec,"dec")

#Adds Phase Transition Dummy Variable to data (1-> in Phase Transition / 0 -> Out of Phase Transition)
dataAllDec$phaseT=as.numeric(dataAllDec$type<=4)

#Adds an experiment version column to the data based on "Participants Log.csv"
partLog0=read.csv(fileParticipants, stringsAsFactors = FALSE)
partLog=data.frame(pID=partLog0$Participant.ID, ExpVersion=partLog0$Experiment.Version,stringsAsFactors = FALSE)
dataAllDec=merge(dataAllDec,partLog, by="pID" )

#Filters to get only the relevant versions
dataAllDec=dplyr::filter(dataAllDec,ExpVersion %in% versions)

#Imports Algorithm-specific complexity measures (MZN porpagations and SAT decisions)
decisionInstanceInfo= read.csv(fileDecInstances, sep=",", stringsAsFactors = FALSE)
names(decisionInstanceInfo)[names(decisionInstanceInfo)=="propagations_MZN"]="propagations"
names(decisionInstanceInfo)[names(decisionInstanceInfo)=="decisions_SAT"]="decisions"
names(decisionInstanceInfo)[names(decisionInstanceInfo)=="problem"]="id"
decisionInstanceInfo=decisionInstanceInfo[,c('id','propagations','decisions')]

#Imports Algorithm-specific complexity measures (All instances with MZN(Gecode))
#decInstanceInfoAll = read.csv(fileDecInstancesAll, sep=",", stringsAsFactors = FALSE)

#Generates DataDecProp:= Decision data including expost complexity measures
dataDecProp=merge(dataAllDec,decisionInstanceInfo, by="id")

# Cleaning Decsion Data:
# Filters out those trials in which an answer was not given
nOmitTrials = length(dataDecProp$answer[dataDecProp$answer==2])
NOmitPart= length(unique(dataDecProp$pID[dataDecProp$answer==2]))
dataDecProp=dataDecProp %>% filter(answer!=2)
print(paste(nOmitTrials,"Trials were omitted due to non-answers (from", NOmitPart,"Participants)."))
## [1] "13 Trials were omitted due to non-answers (from 8 Participants)."
#Filter additional participants.
#Filter paticipant with 55% accuracy?
participantsToOmit=c()#c("be16")
dataDecProp = dataDecProp %>% filter(!(pID %in% participantsToOmit))
print(paste0(length(participantsToOmit)," participants were omitted in the decision analysis."))
## [1] "0 participants were omitted in the decision analysis."
#Adds sum of weights and sum of values 
dataDecProp=addSumOfValues(dataDecProp)
dataDecProp=addSumOfWeights(dataDecProp)

names(dataDecProp)[names(dataDecProp)=="timeSpentAprox"]="timeSpent"
summaryListDec= summaryData(dataDecProp)

1.1 Descriptive Summary

print(paste('Participants To be analysed are:',paste(sort(unique(dataDecProp$pID)),collapse=" ")))
## [1] "Participants To be analysed are: be14 be15 be16 be17 be18 be19 be20 be21 be22 be23 be24 be25 be26 be27 be28 be29 be30 be31 be32 be33"
print(paste('Number of participants to analyse :',length(unique(dataDecProp$pID)) ) )
## [1] "Number of participants to analyse : 20"
print(paste("The overall Accuracy was: ",mean(dataDecProp$correct)))
## [1] "The overall Accuracy was:  0.831114225648213"
#Summary Stats for Decision Problem
dataInput=dataDecProp

accuracySummary = dataInput %>% group_by(pID) %>% summarise(acc=mean(correct)) %>% summarise(mean=mean(acc),min=min(acc),max=max(acc),SD=sd(acc))
kable(accuracySummary, digits=2, caption = "Accuracy Summary")
Accuracy Summary
mean min max SD
0.83 0.56 0.9 0.08
answerSummary = dataInput %>% group_by(pID) %>% summarise(acc=mean(answer)) %>% summarise(mean=mean(acc),min=min(acc),max=max(acc),SD=sd(acc))
kable(answerSummary, digits=2, caption = "Proportion of times that YES was selected as the answer")
Proportion of times that YES was selected as the answer
mean min max SD
0.48 0.31 0.6 0.06
yesNoProportions = dataInput %>% group_by(sol,pID) %>% summarise(acc=mean(correct)) %>% summarise(mean=mean(acc),min=min(acc),max=max(acc),SD=sd(acc))
kable(yesNoProportions, digits=2, caption = "Accuracy By Solution")
Accuracy By Solution
sol mean min max SD
0 0.85 0.69 0.97 0.08
1 0.81 0.36 0.89 0.12

1.1.1 Effect of Trial number on accuracy (Computational Performance)

#Trial (experience effect) effect

#Summary
summaryByBlock = dataInput %>% group_by(block,pID) %>% summarise(acc=mean(correct)) %>% summarise(mean=mean(acc),min=min(acc),max=max(acc),SD=sd(acc))
kable(summaryByBlock, digits=2, caption = "Accuracy By Block Number")
Accuracy By Block Number
block mean min max SD
1 0.83 0.62 1.00 0.10
2 0.83 0.50 0.96 0.11
3 0.83 0.42 0.96 0.11
#Regresssion
dataInput=dataDecProp
dataInput$totalTrial = dataInput$block * dataInput$trial

logitRandomIntercept = glmer(correct ~ totalTrial+ (1|pID), family=binomial(link="logit"), data=dataInput)

#logitRandomInterceptSlope = glmer(correct ~ totalTrial + (1|pID) + (totalTrial|pID), family=binomial(link="logit"), data=dataInput)

title="Logistic regressions"
tableName='dec01r'
modelTableStyle(logitRandomIntercept,title,outputType,tableName)

[1] “dec01r”

Logistic regressions
correct
Random Intercept
totalTrial 0.005
(0.004)
p = 0.196
Constant 1.516***
(0.150)
p = 0.000
Observations 1,427
Log Likelihood -639.577
Akaike Inf. Crit. 1,285.153
Bayesian Inf. Crit. 1,300.943
Note: p<0.1; p<0.05; p<0.01

1.2 In and Out of Phase Transition Contrast

1.2.1 Accuracy Contrast (Computational Performance)

dataInput=dataDecProp %>% group_by(phaseT,pID)%>%summarise(accuracy1=mean(correct))%>%ungroup()%>%group_by(phaseT)%>%summarise(accuracy=mean(accuracy1),se=se(accuracy1))%>%ungroup()

dataInput$phaseT = recode(dataInput$phaseT, '0' = "Low IC", '1' = "High IC")

plo= ggplot(data=dataInput, aes(y=accuracy, x=as.factor(phaseT),label = round(accuracy,digits = 2))) +
  geom_bar(stat = "identity")+
  geom_errorbar(aes(ymin=accuracy-se, ymax=accuracy+se), width=.1)+
  geom_signif(comparisons = list(c("Low IC","High IC")), annotations="***", y_position = 0.95, tip_length = 0.03)+
  labs(title="Accuracy In and Out of phase Transition",x="Instance complexity",y="Accuracy")+
  coord_cartesian(ylim = c(0.5,1))+
  geom_text(hjust=0.5,vjust = 5, colour="white") +
  theme_light()+
  theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),plot.title = element_text(hjust = 0.5))

outputName = "dec02g"
plotExport(plo,outputName)
## [1] "dec02g"
## [1] "~/Google Drive/Melbourne/UNIMELB/Research/Complexity Project/KS-IC/Code/Behavioural-Analysis"

## quartz_off_screen 
##                 2
#Anova contrast for In/Out Phase transition
anovaModel=aov(correct~phaseT+Error(pID/phaseT),dataDecProp)
anovaPValue=summary(anovaModel)[[2]][[1]][['Pr(>F)']][[1]]
print(paste("P-value for one way ANOVA:",signif(anovaPValue,digits=3)))

[1] “P-value for one way ANOVA: 3.58e-09”

rm(dataInput)
#Logistic Regression for In/Out Phase transition

#logistic with random effects (intercept)
logitRandomIntercept = glmer(correct ~ phaseT + (1|pID), family=binomial(link="logit"), data=dataDecProp)

title="Logistic regressions"
tableName='dec02r'
modelTableStyle(logitRandomIntercept,title,outputType,tableName)

[1] “dec02r”

Logistic regressions
correct
Random Intercept
phaseT -1.327***
(0.161)
p = 0.000
Constant 2.451***
(0.167)
p = 0.000
Observations 1,427
Log Likelihood -602.372
Akaike Inf. Crit. 1,210.744
Bayesian Inf. Crit. 1,226.534
Note: p<0.1; p<0.05; p<0.01

1.2.2 Solvability and Phase Transition

1.2.2.1 Is Phase Transition is still significant regardless of the instance being solvable or not?

dataInput=dataDecProp %>% group_by(phaseT,sol,pID)%>%summarise(accuracy1=mean(correct))%>%ungroup()%>%group_by(sol,phaseT)%>%summarise(accuracy=mean(accuracy1),se=se(accuracy1))%>%ungroup()

sol_names <- c(
                    `0` = "Correct answer: NO",
                    `1` = "Correct answer: YES",
                    `2` = "If this is here it means data not filtered"
                    )

plo=ggplot(data=dataInput, aes(y=accuracy, x=as.factor(as.logical(phaseT)), label = round(accuracy,digits = 2),group=1)) +
  geom_line()+
  geom_errorbar(aes(ymin=accuracy-se, ymax=accuracy+se), width=.1) +
  geom_point(shape=21, size=3, fill="white")+
  labs(title="Accuracy segregated by solvanbility",x="In Phase Transition?",y="Accuracy")+
  theme(plot.title = element_text(hjust = 0.5), axis.text.x = element_text(angle=0,hjust=0.5))+
  geom_text(hjust=-0.5, colour="black")+
  facet_grid(.~sol, labeller = as_labeller(sol_names))

outputName = "dec03g"
plotExport(plo,outputName)
## [1] "dec03g"
## [1] "~/Google Drive/Melbourne/UNIMELB/Research/Complexity Project/KS-IC/Code/Behavioural-Analysis"

## quartz_off_screen 
##                 2
#logistic with random effects (intercept)
logitRandomIntercept = glmer(correct ~ phaseT + sol + phaseT:sol + (1|pID), family=binomial(link="logit"), data=dataDecProp)

title="Logistic regressions"
tableName='dec03r'
modelTableStyle(logitRandomIntercept,title,outputType,tableName)

[1] “dec03r”

Logistic regressions
correct
Random Intercept
phaseT -1.285***
(0.240)
p = 0.00000
sol -0.250
(0.271)
p = 0.355
phaseT:sol -0.084
(0.323)
p = 0.796
Constant 2.584***
(0.224)
p = 0.000
Observations 1,427
Log Likelihood -600.149
Akaike Inf. Crit. 1,210.299
Bayesian Inf. Crit. 1,236.615
Note: p<0.1; p<0.05; p<0.01

1.2.3 Phase Transition effect by Region (Over-Under-InPT)

1.2.3.1 Is Phase Transition is significant irregardless of region (OverConstrained/Underconstrained)?

dataInput=dataDecProp %>% mutate(region=recode(type, `1`= 'Phase Transition', `2`= 'Phase Transition', `3`= 'Phase Transition', `4`='Phase Transition', '5'='Overconstrained', '6'='Underconstrained'))

dataInput2=dataInput %>% group_by(region,pID)%>%summarise(accuracy1=mean(correct))%>%ungroup()%>%group_by(region)%>%summarise(accuracy=mean(accuracy1),se=se(accuracy1))%>%ungroup()

dataInput2$region <- factor(dataInput2$region, levels = c('Underconstrained',
                                                          'Phase Transition',
                                                          'Overconstrained'))

plo= ggplot(data=dataInput2, aes(y=accuracy, x=region, label = round(accuracy,digits = 2))) +
  geom_bar(stat = "identity")+
  geom_signif(annotations = c("***","***"),
              y_position=c(0.95,0.95),xmin=c(1.1,2.1),xmax=c(1.9,2.9))+
  geom_signif(comparisons = list(c("Underconstrained","Overconstrained")),
               annotations="NS", y_position = 0.99, tip_length = 0.03)+
  geom_errorbar(aes(ymin=accuracy-se, ymax=accuracy+se), width=.1)+
  # geom_signif(comparisons = list(c("Phase Transition","Overconstrained")),
  #             annotations="***", y_position = 0.98, tip_length = 0.05)+
  #   geom_signif(comparisons = list(c("Phase Transition","Underconstrained")),
  #             annotations="***", y_position = 0.96, tip_length = 0.05,size=0.4)+
  labs(title="Accuracy by region",x="Region",y="Accuracy")+
  coord_cartesian(ylim = c(0.5,1))+
  geom_text(hjust=0.5,vjust = 5, colour="white") +
  theme_light()+
  theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),plot.title = element_text(hjust = 0.5))

outputName = "decB01g"
plotExport(plo,outputName)
## [1] "decB01g"
## [1] "~/Google Drive/Melbourne/UNIMELB/Research/Complexity Project/KS-IC/Code/Behavioural-Analysis"

## quartz_off_screen 
##                 2
#Stats test for time in and out of phase transition: ANOVA
dataInput$overConstrained= (dataInput$region=='Overconstrained')
dataInput$underConstrained= (dataInput$region=='Underconstrained')


# print("Anova between Overconstrained and Underconstrained")
# anovaModel=aov(correct~overConstrained+Error(pID/overConstrained),dataInput %>% filter(overConstrained | underConstrained))
# anovaPValue=summary(anovaModel)[[2]][[1]][['Pr(>F)']][[1]]
# print(paste("P-value for one way ANOVA:",signif(anovaPValue,digits=3)))
# 
# print("Anova between Overconstrained and In-PhaseT")
# anovaModel=aov(correct~phaseT+Error(pID/phaseT),dataInput %>% filter(overConstrained | phaseT))
# anovaPValue=summary(anovaModel)[[2]][[1]][['Pr(>F)']][[1]]
# print(paste("P-value for one way ANOVA:",signif(anovaPValue,digits=3)))
# 
# print("Anova between Underconstrained and In-PhaseT")
# anovaModel=aov(correct~phaseT+Error(pID/phaseT),dataInput %>% filter(underConstrained | phaseT))
# anovaPValue=summary(anovaModel)[[2]][[1]][['Pr(>F)']][[1]]
# print(paste("P-value for one way ANOVA:",signif(anovaPValue,digits=3)))
logitRandomIntercept = glmer(correct ~ overConstrained + underConstrained + (1|pID), family=binomial(link="logit"), data=dataInput)

title="Logistic regressions"
tableName="decB01r1"
modelTableStyle(logitRandomIntercept,title,outputType,tableName)

[1] “decB01r1”

Logistic regressions
correct
Random Intercept
overConstrained 1.459***
(0.220)
p = 0.000
underConstrained 1.208***
(0.202)
p = 0.000
Constant 1.125***
(0.130)
p = 0.000
Observations 1,427
Log Likelihood -601.946
Akaike Inf. Crit. 1,211.891
Bayesian Inf. Crit. 1,232.944
Note: p<0.1; p<0.05; p<0.01
rm(logit)
## Warning in rm(logit): object 'logit' not found
logitRandomIntercept = glmer(correct ~ overConstrained + phaseT + (1|pID), family=binomial(link="logit"), data=dataInput)

title="Logistic regressions"
tableName="decB01r2"
modelTableStyle(logitRandomIntercept,title,outputType,tableName)

[1] “decB01r2”

Logistic regressions
correct
Random Intercept
overConstrained 0.250
(0.271)
p = 0.355
phaseT -1.208***
(0.202)
p = 0.000
Constant 2.333***
(0.206)
p = 0.000
Observations 1,427
Log Likelihood -601.946
Akaike Inf. Crit. 1,211.891
Bayesian Inf. Crit. 1,232.944
Note: p<0.1; p<0.05; p<0.01
rm(logit)
## Warning in rm(logit): object 'logit' not found

1.3 MZN Propagations and accuracy (Computational Performance)

#MZN Propagations
groups4=dataDecProp  %>% group_by(propagations)
dat4=summarise(groups4, accuracy=mean(correct), se=se(correct))

plo = ggplot(data=dat4, aes(y=accuracy, x=propagations)) +
  geom_point()+
  labs(title="Accuracy and MZN",y="Accuracy",x="MZN Complexity Measure (Propagations)")+
  theme(plot.title = element_text(hjust = 0.5)) +
  #geom_smooth(method = lm)+
  #stat_smooth(method = glm, method.args = list(family=binomial(link="logit")))+
  stat_smooth(method = glm)+
  geom_hline(aes(yintercept=0.5), colour="#990000", linetype="dashed")

outputName = "dec04g"
plotExport(plo,outputName)
## [1] "dec04g"
## [1] "~/Google Drive/Melbourne/UNIMELB/Research/Complexity Project/KS-IC/Code/Behavioural-Analysis"

## quartz_off_screen 
##                 2
logitRandomIntercept = glmer(correct ~ scale(propagations) + (1|pID), family=binomial(link="logit"), data=dataDecProp)
#summary(logitRandomInterceptSlope)

title="Logistic regressions"
tableName='dec04r'
modelTableStyle(logitRandomIntercept,title,outputType,tableName)

[1] “dec04r”

Logistic regressions
correct
Random Intercept
scale(propagations) -0.362***
(0.066)
p = 0.00000
Constant 1.689***
(0.118)
p = 0.000
Observations 1,427
Log Likelihood -626.120
Akaike Inf. Crit. 1,258.240
Bayesian Inf. Crit. 1,274.030
Note: p<0.1; p<0.05; p<0.01
rm(logit2)
## Warning in rm(logit2): object 'logit2' not found

1.4 SAT Decisions and Accuracy (Computational Performance)

groups5=dataDecProp %>% group_by(decisions)
dat5=summarise(groups5, accuracy=mean(correct), se=se(correct))

plo = ggplot(data=dat5, aes(y=accuracy, x=decisions)) +
  geom_point()+
  labs(title="Accuracy and SAT Solver",y="Accuracy",x="SAT Complexity Measure (Decisions)")+
  theme(plot.title = element_text(hjust = 0.5)) +
  #stat_smooth(method = glm, method.args = list(family=binomial(link="logit")))+
  geom_smooth(method = glm) +
  coord_cartesian(ylim = c(0.50,1.00)) 

outputName = "dec05g"
plotExport(plo,outputName)
## [1] "dec05g"
## [1] "~/Google Drive/Melbourne/UNIMELB/Research/Complexity Project/KS-IC/Code/Behavioural-Analysis"

## quartz_off_screen 
##                 2
logitRandomIntercept = glmer(correct ~ decisions + (1|pID), family=binomial(link="logit"), data=dataDecProp)

title="Logistic regressions"
tableName='dec05r'
modelTableStyle(logitRandomIntercept,title,outputType,tableName)

[1] “dec05r”

Logistic regressions
correct
Random Intercept
decisions -0.022
(0.026)
p = 0.395
Constant 1.714***
(0.142)
p = 0.000
Observations 1,427
Log Likelihood -640.066
Akaike Inf. Crit. 1,286.131
Bayesian Inf. Crit. 1,301.921
Note: p<0.1; p<0.05; p<0.01

1.5 Number of Solutions that satisfy the Constraints and Accuracy (Computational Performance)

dataInput=dataDecProp
dataInput=nSolutions(dataInput)

#Plots nSolutions (x) vs. Accuracy (y)
dataInput3 = dataInput %>% group_by(nSolutions,pID,phaseT)%>%summarise(accuracyMeans=mean(correct))%>%ungroup()%>%group_by(nSolutions,phaseT)%>%summarise(accuracy=mean(accuracyMeans),se=se(accuracyMeans))%>%ungroup()

dataInput3$sol = dataInput3$nSolutions>=1
dataInput3$phaseT = recode(dataInput3$phaseT, '0' = "Low IC", '1' = "High IC")

plo= ggplot(data=dataInput3, aes(y=accuracy, x=as.factor(nSolutions))) +
  geom_errorbar(aes(ymin=accuracy-se, ymax=accuracy+se), width=.1)+
  geom_point(shape=23, size=3, fill="red")+
  labs(title="Accuracy and the Number of Solutions",
       x="Number of   Solutions",y="Accuracy")+
  coord_cartesian(ylim = c(0.5,1))+
  theme_light()+
  theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
        plot.title = element_text(hjust = 0.5),strip.text.x = element_blank())+
  facet_grid(phaseT~sol, scales = "free_x", space = "free_x")+
  geom_smooth(data=dataInput3, aes(y=accuracy, x=nSolutions),method=glm,se=FALSE,fullrange=TRUE,linetype = "dashed")

outputName = "decB02g"
plotExport(plo,outputName)
## [1] "decB02g"
## [1] "~/Google Drive/Melbourne/UNIMELB/Research/Complexity Project/KS-IC/Code/Behavioural-Analysis"

## quartz_off_screen 
##                 2
#Number of solutions of solvable solutions: Meand difference between In/Out of Phase T
library(pander)
dataInput2 = unique(dataInput %>% select(phaseT,id,nSolutions,sol) %>% filter(sol==1))
diffMeans = t.test(nSolutions ~ phaseT ,data=dataInput2)
pander(diffMeans)
Welch Two Sample t-test: nSolutions by phaseT (continued below)
Test statistic df P value Alternative hypothesis
8.428 26.36 5.879e-09 * * * two.sided
mean in group 0 mean in group 1
8.389 2.167
#Includes a dummy if nSolutions==0, that's variable: sol.
logitRandomIntercept = glmer(correct ~ sol + phaseT + nSolutions + phaseT:nSolutions + (1|pID), family=binomial(link="logit"), data=dataInput)

title="Logistic regressions"
tableName='decB02r'
modelTableStyle(logitRandomIntercept,title,outputType,tableName)

[1] “decB02r”

Logistic regressions
correct
Random Intercept
sol -1.427***
(0.243)
p = 0.000
phaseT -1.339***
(0.225)
p = 0.000
nSolutions 0.139***
(0.041)
p = 0.001
phaseT:nSolutions 0.448***
(0.101)
p = 0.00001
Constant 2.627***
(0.220)
p = 0.000
Observations 1,427
Log Likelihood -581.201
Akaike Inf. Crit. 1,174.402
Bayesian Inf. Crit. 1,205.982
Note: p<0.1; p<0.05; p<0.01
#This version recodes phaseT to OutphaseT, to calculate the p-value of the numberOfSolutions beta(slope) In Phase Transition
dataInput$OutPhaseT = 1 - dataInput$phaseT
logitRandomIntercept2 = glmer(correct ~ sol + OutPhaseT + nSolutions + OutPhaseT:nSolutions + (1|pID), family=binomial(link="logit"), data=dataInput)

title="Logistic regressions"
tableName='decB02r_b'
modelTableStyle(logitRandomIntercept2,title,outputType,tableName)

[1] “decB02r_b”

Logistic regressions
correct
Random Intercept
sol -1.427***
(0.243)
p = 0.000
OutPhaseT 1.339***
(0.225)
p = 0.000
nSolutions 0.586***
(0.112)
p = 0.00000
OutPhaseT:nSolutions -0.448***
(0.101)
p = 0.00001
Constant 1.289***
(0.162)
p = 0.000
Observations 1,427
Log Likelihood -581.201
Akaike Inf. Crit. 1,174.402
Bayesian Inf. Crit. 1,205.982
Note: p<0.1; p<0.05; p<0.01

1.6 Instance Sampling Figure

#This shows the instances sampled for the decision task, together with the regions from which they were sampled
dataInput = dataDecProp
dataInput$nCapacity=dataInput$c/dataInput$totalWeights
dataInput$nProfit=dataInput$p /dataInput$totalValues

dataInput$lnVC=log(dataInput$nProfit/dataInput$nCapacity)
dataInput3 = dataInput %>% mutate(region=recode(type, `1`= 'Phase Transition', 
                                                `2`= 'Phase Transition', `3`= 'Phase Transition', 
                                                `4`='Phase Transition', '5'='Overconstrained', 
                                                '6'='Underconstrained'))

dataInput2 =dataInput3 %>% group_by(region) %>% summarise(minP=min(propagations),
                                                       medianP = median(propagations),
                                                       maxP =max(propagations),
                                                       mlnVC=mean(lnVC)) %>% arrange(desc(region))
phaseTransitionLevelLow=log(0.6/0.45)
phaseTransitionLevelHigh=log(0.65/0.4)
OUTphaseTransitionLevelLow=log(0.85/0.45)
OUTphaseTransitionLevelHigh=log(0.9/0.4)
OUT2phaseTransitionLevelLow=log(0.35/0.45)
OUT2phaseTransitionLevelHigh=log(0.4/0.4)

plo1 = ggplot(dataInput3,aes(x=lnVC,y=propagations,group=as.factor(region)))+
  geom_crossbar(data=dataInput2,aes(ymin = minP, ymax = maxP, x = mlnVC, y = minP),
                fill = "red",alpha=1)+
  geom_crossbar(data=dataInput2,aes(ymin = minP, ymax = c(maxP[1],medianP[2],maxP[3]), x = mlnVC, y = minP, fill=as.factor(region)),alpha=1)+
  scale_fill_manual(name  ="Region",
                     breaks=c("Overconstrained", "Phase Transition", "Underconstrained"),
                     labels=c("Overconstrained", "Phase Transition", "Underconstrained"),
                     values=c("#FBB040","green","#FBB040"))+
  geom_point(aes(shape=as.factor(sol)))+
  scale_shape_manual(name  ="Solution",
                        breaks=c("0", "1"),
                        labels=c("NO", "YES"),
                        values=c(6,4))+
  theme_light()+
  theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),plot.title = element_text(hjust = 0.5))

plotExport(plo1,"decSampling")
## [1] "decSampling"
## [1] "~/Google Drive/Melbourne/UNIMELB/Research/Complexity Project/KS-IC/Code/Behavioural-Analysis"

## quartz_off_screen 
##                 2

1.7 Accuracy (Comp. Perf.) and continuous approximation to Instance Complexity (Symmetric)

#Plots ln (Normalosed Profit / Normalised capacity) against accuracy for the decision task
dataInput = dataDecProp
dataInput$nCapacity=dataInput$c/dataInput$totalWeights
dataInput$nProfit=dataInput$p /dataInput$totalValues

dataInput$lnVC=log(dataInput$nProfit/dataInput$nCapacity)
p_IC = 0.4

dataInput$d_lnVC=abs(dataInput$lnVC-p_IC)

dataInput$IC_cont = -pweibull(dataInput$d_lnVC, shape=2, scale=0.2) 

plo= ggplot(dataInput,aes(x=d_lnVC,y=IC_cont))+
  geom_point()

outputName='dec06g'
plotExport(plo,outputName)
## [1] "dec06g"
## [1] "~/Google Drive/Melbourne/UNIMELB/Research/Complexity Project/KS-IC/Code/Behavioural-Analysis"

## quartz_off_screen 
##                 2
#logistic with random effects (intercept)
logitRandomIntercept = glmer(correct ~ IC_cont + (1|pID), family=binomial(link="logit"), data=dataInput)

title="Logistic regressions"
tableName='dec06r'
modelTableStyle(logitRandomIntercept,title,outputType,tableName)

[1] “dec06r”

Logistic regressions
correct
Random Intercept
IC_cont -1.463***
(0.176)
p = 0.000
Constant 1.057***
(0.133)
p = 0.000
Observations 1,427
Log Likelihood -601.498
Akaike Inf. Crit. 1,208.997
Bayesian Inf. Crit. 1,224.787
Note: p<0.1; p<0.05; p<0.01

1.8 Accuracy and continuous approximation to Instance Complexity (Non-Symmetric)

#Plots ln (Normalosed Profit / Normalised capacity) against accuracy for the decision task
dataInput = dataDecProp
dataInput$nCapacity=dataInput$c/dataInput$totalWeights
dataInput$nProfit=dataInput$p /dataInput$totalValues

dataInput$lnVC=log(dataInput$nProfit/dataInput$nCapacity)
p_IC = 0.4

dataInput$d_lnVC=dataInput$lnVC-p_IC

shape_r=2
shape_l=2
scale_r=0.2
scale_l=0.2

dataInput$IC_cont[dataInput$d_lnVC>=0] = -pweibull(dataInput$d_lnVC[dataInput$d_lnVC>=0], shape=shape_r, scale=scale_r)+1 

min_l=0.25
dataInput$IC_cont[dataInput$d_lnVC<0] = (-pweibull(-dataInput$d_lnVC[dataInput$d_lnVC<0], shape=shape_l, scale=scale_l)+1)*(1-min_l)+min_l
#-pweibull(-dataInput$d_lnVC[dataInput$d_lnVC<0], shape=shape_l, scale=scale_l)+1

phaseTransitionLevelLow=log(0.6/0.45)
phaseTransitionLevelHigh=log(0.65/0.4)
OUTphaseTransitionLevelLow=log(0.85/0.45)
OUTphaseTransitionLevelHigh=log(0.9/0.4)
OUT2phaseTransitionLevelLow=log(0.35/0.45)
OUT2phaseTransitionLevelHigh=log(0.4/0.4)

plo = ggplot(dataInput,aes(x=lnVC,y=IC_cont))+
  annotate("rect", xmin=phaseTransitionLevelLow,xmax=phaseTransitionLevelHigh,ymin=0,ymax=Inf, alpha=0.2, fill="red")+
  annotate("rect", xmin=OUTphaseTransitionLevelLow,xmax=OUTphaseTransitionLevelHigh,ymin=0,ymax=Inf, alpha=0.2, fill="green")+
  annotate("rect", xmin=OUT2phaseTransitionLevelLow,xmax=OUT2phaseTransitionLevelHigh,ymin=0,ymax=Inf, alpha=0.2, fill="green")+
  geom_point()+
  theme_light()+
  theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank())

outputName='dec06gB'
plotExport(plo,outputName)
## [1] "dec06gB"
## [1] "~/Google Drive/Melbourne/UNIMELB/Research/Complexity Project/KS-IC/Code/Behavioural-Analysis"

## quartz_off_screen 
##                 2
#logistic with random effects (intercept)
logitRandomIntercept = glmer(correct ~ IC_cont + (1|pID), family=binomial(link="logit"), data=dataInput)

title="Logistic regressions"
tableName='dec06rB'
modelTableStyle(logitRandomIntercept,title,outputType,tableName)

[1] “dec06rB”

Logistic regressions
correct
Random Intercept
IC_cont -1.701***
(0.203)
p = 0.000
Constant 2.758***
(0.193)
p = 0.000
Observations 1,427
Log Likelihood -600.438
Akaike Inf. Crit. 1,206.876
Bayesian Inf. Crit. 1,222.666
Note: p<0.1; p<0.05; p<0.01

2 Knapsack Optimisation Variant

dataAllOpt = importTrialInfo(folderDataOpt,"opt")

#Adds an experiment version column to the data based on "Participants Log.csv"
dataAllOpt=merge(dataAllOpt,partLog, by="pID" )

#Filters to get only the relevant versions
dataAllOpt=dplyr::filter(dataAllOpt,ExpVersion %in% versions)

#Imports Data Clicks Data
dataAllClicks = importClicksInfo(folderDataOpt)
dataAllClicks=merge(dataAllClicks,partLog, by="pID")
dataAllClicks=dplyr::filter(dataAllClicks,ExpVersion %in% versions)

#Adds Phase Transition Dummy Variable to data (1-> in Phase Transition / 0 -> Out of Phase Transition)
dataAllOpt$phaseT=as.numeric(dataAllOpt$type<=4)

#Imports Algorithm-specific complexity measures (MZN porpagations, SAT decisions and Sahni-k)
optInstanceInfo= read.csv(fileOptInstances, sep=",", stringsAsFactors = FALSE)
names(optInstanceInfo)[names(optInstanceInfo)=="propagations_MZN"]="propagations"
names(optInstanceInfo)[names(optInstanceInfo)=="decisions_SAT"]="decisions"
names(optInstanceInfo)[names(optInstanceInfo)=="problem"]="id"
optInstanceInfo=optInstanceInfo[,c('id','propagations','decisions','sahniK')]

#Generates DataOptProp:= Optimisation Task data including expost complexity measures
dataOptProp=merge(dataAllOpt,optInstanceInfo, by="id")

#Adds A column with the number of click away from optimum for each trial
dataOptProp=addItemDistanceFromOpt(dataOptProp)
#Adds sum of weights and sum of values 
dataOptProp=addSumOfValues(dataOptProp)
dataOptProp=addSumOfWeights(dataOptProp)

# Cleaning Optimisation Task Data:
# Filters out the cases in which participants spent less than 1 second in the task
dataOptProp1 = dataOptProp %>% filter(dataOptProp$timeSpent>=1)
numberOfDeletedTrials=dim(dataOptProp)[1]-dim(dataOptProp1)[1]

NOmitPartOpt= length(unique(dataOptProp %>% filter(dataOptProp$timeSpent<1) %>% pull(pID)))

print(paste("Number of Trials Deleted because participants spent less than 1 second in the task:",
            numberOfDeletedTrials,"(from",NOmitPartOpt,"Participants)."))
## [1] "Number of Trials Deleted because participants spent less than 1 second in the task: 2 (from 2 Participants)."
dataOptProp = dataOptProp1
rm(dataOptProp1)

summaryListOpt= summaryData(dataOptProp)

# Omit those participants that never submitted their answer from the timeSpent analysis
#omitPID = c("be19","be31")
minTimeSpent = dataOptProp %>% group_by(pID) %>% summarise(minTime = min(timeSpent)) 
omitPID = minTimeSpent$pID[abs(optTaskMaxTime-minTimeSpent$minTime)<0.1] #Select those participants whose min time spent was optTaskMaxTime
dataOptTime=dataOptProp[!(dataOptProp$pID %in% omitPID),]
print(paste0("For time analysis (*effort*) ", length(omitPID)," participants that never submitted their answers were omited."))
## [1] "For time analysis (*effort*) 3 participants that never submitted their answers were omited."

2.1 Descriptive Sumamry

print(paste('Participants To be analysed are:',paste(unique(dataOptProp$pID),collapse=" ")))
## [1] "Participants To be analysed are: be30 be31 be32 be19 be15 be24 be26 be16 be25 be18 be33 be14 be29 be28 be17 be27 be23 be21 be22 be20"
print(paste('Number of participants to analyse :',length(unique(dataOptProp$pID)) ) )
## [1] "Number of participants to analyse : 20"
print(paste("The overall Accuracy was: ",mean(dataOptProp$correct)))
## [1] "The overall Accuracy was:  0.832402234636871"
#Summary Stats for Optimisation Problem
dataInput=dataOptProp
dataInput2= dataOptTime

accuracySummary = dataInput %>% group_by(pID) %>% summarise(acc=mean(correct)) %>% summarise(mean=mean(acc),min=min(acc),max=max(acc),SD=sd(acc))
kable(accuracySummary, digits=2, caption ="Accuracy Summary")
Accuracy Summary
mean min max SD
0.83 0.67 0.94 0.08
timeSummary = dataInput2 %>% group_by(pID) %>% summarise(acc=mean(timeSpent)) %>% summarise(mean=mean(acc),min=min(acc),max=max(acc),SD=sd(acc))
kable(timeSummary, digits=2, caption ="Time spent Summary")
Time spent Summary
mean min max SD
42.51 27.38 58.66 8.09

2.1.1 Effect of Trial number

2.1.1.1 Trial Number and Accuracy

#Trial (experience effect) effect on accuracy
dataInput=dataOptProp
dataInput$totalTrial = dataInput$block * dataInput$trial

summaryByBlock = dataInput %>% group_by(block,pID) %>% summarise(acc=mean(correct)) %>% summarise(mean=mean(acc),min=min(acc),max=max(acc),SD=sd(acc))
kable(summaryByBlock, digits=2, caption="Accuracy By Block")
Accuracy By Block
block mean min max SD
1 0.79 0.44 1 0.14
2 0.88 0.67 1 0.11
#Regression
logitRandomIntercept = glmer(correct ~ totalTrial+ (1|pID), family=binomial(link="logit"), data=dataInput)

title="Logistic regressions"
tableName='opt01r'
modelTableStyle(logitRandomIntercept,title,outputType,tableName)

[1] “opt01r”

Logistic regressions
correct
Random Intercept
totalTrial 0.012
(0.030)
p = 0.683
Constant 1.512***
(0.261)
p = 0.000
Observations 358
Log Likelihood -161.752
Akaike Inf. Crit. 329.504
Bayesian Inf. Crit. 341.146
Note: p<0.1; p<0.05; p<0.01

2.1.1.2 Trial Number and Time Spent

#Trial (experience effect) effect on timeSpent
dataInput=dataOptTime
dataInput$totalTrial = dataInput$block * dataInput$trial

summaryByBlock = dataInput %>% group_by(block,pID) %>% summarise(time=mean(timeSpent)) %>% summarise(mean=mean(time),min=min(time),max=max(time),SD=sd(time))
kable(summaryByBlock,digits=2 , caption="Time Spent per Trial segregated By Block")
Time Spent per Trial segregated By Block
block mean min max SD
1 42.81 31.11 57.33 7.60
2 42.22 23.65 60.00 9.07
#Regression
linearRandomIntercept = lmer(timeSpent ~ totalTrial+ (1|pID), data=dataInput)

title="Linear regressions"
tableName='opt02r'
modelTableStyle(linearRandomIntercept,title,outputType,tableName)

[1] “opt02r”

Linear regressions
timeSpent
Random Intercept
totalTrial 0.095
(0.135)
p = 0.483
Constant 41.802***
(2.210)
p = 0.000
Observations 305
Log Likelihood -1,188.894
Akaike Inf. Crit. 2,385.788
Bayesian Inf. Crit. 2,400.670
Note: p<0.1; p<0.05; p<0.01

2.2 In and Out of Phase Transition Contrast

2.2.1 Phase Transition and Accuracy (Computational Performance)

dataInput=dataOptProp %>% group_by(phaseT,pID)%>%summarise(accuracy1=mean(correct))%>%ungroup()%>%group_by(phaseT)%>%summarise(accuracy=mean(accuracy1),se=se(accuracy1))%>%ungroup()

dataInput$phaseT = recode(dataInput$phaseT, '0' = "Low IC", '1' = "High IC")

plo= ggplot(data=dataInput, aes(y=accuracy, x=as.factor(phaseT),label = round(accuracy,digits = 2))) +
  geom_bar(stat = "identity")+
  geom_errorbar(aes(ymin=accuracy-se, ymax=accuracy+se), width=.1)+
  geom_signif(comparisons = list(c("Low IC","High IC")), annotations="***", y_position = 1.01, tip_length = 0.03)+
  labs(title="Accuracy and Instance Complexity (Optimisation)",x="Instance complexity",y="Computational performance")+
  coord_cartesian(ylim = c(0.5,1.03))+
  geom_text(hjust=0.5,vjust = 5, colour="white") +
  theme_light()+
  theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),plot.title = element_text(hjust = 0.5))

outputName = "opt03g"
plotExport(plo,outputName)
## [1] "opt03g"
## [1] "~/Google Drive/Melbourne/UNIMELB/Research/Complexity Project/KS-IC/Code/Behavioural-Analysis"

## quartz_off_screen 
##                 2
anovaModel=aov(correct~phaseT+Error(pID/phaseT),dataOptProp)
anovaPValue=summary(anovaModel)[[2]][[1]][['Pr(>F)']][[1]]
print(paste("P-value for one way ANOVA:",signif(anovaPValue,digits=3)))
## [1] "P-value for one way ANOVA: 2.21e-06"
rm(dataInput)
#logistic with random effects (intercept)
logitRandomIntercept = glmer(correct ~ phaseT + (1|pID), family=binomial(link="logit"), data=dataOptProp)

title="Logistic regressions"
tableName='opt03r'
modelTableStyle(logitRandomIntercept,title,outputType,tableName)

[1] “opt03r”

Logistic regressions
correct
Random Intercept
phaseT -2.175***
(0.531)
p = 0.00005
Constant 3.359***
(0.509)
p = 0.000
Observations 358
Log Likelihood -147.622
Akaike Inf. Crit. 301.245
Bayesian Inf. Crit. 312.887
Note: p<0.1; p<0.05; p<0.01

2.2.2 Time Spent (Effort) In/Out Phase Transition

dataInput=dataOptTime %>% group_by(phaseT,pID)%>%summarise(timeSpentMeans=mean(timeSpent))%>%ungroup()%>%group_by(phaseT)%>%summarise(time=mean(timeSpentMeans),se=se(timeSpentMeans))%>%ungroup()

dataInput$phaseT = recode(dataInput$phaseT, '0' = "Low IC", '1' = "High IC")

plo= ggplot(data=dataInput, aes(y=time, x=as.factor(phaseT),label = round(time,digits = 1))) +
  geom_bar(stat = "identity")+
  geom_errorbar(aes(ymin=time-se, ymax=time+se), width=.1)+
  geom_signif(comparisons = list(c("Low IC","High IC")), annotations="***", y_position = 50, tip_length = 0.03)+
  labs(title="Time spent on an instance and Instance Complexity",x="Instance complexity",y="Average time spent on an instance")+
  geom_text(hjust=0.5,vjust = 5, colour="white") +
  theme_light()+
  theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),plot.title = element_text(hjust = 0.5))+
  coord_cartesian(ylim = c(0,52))

outputName = "opt04g"
plotExport(plo,outputName)
## [1] "opt04g"
## [1] "~/Google Drive/Melbourne/UNIMELB/Research/Complexity Project/KS-IC/Code/Behavioural-Analysis"

## quartz_off_screen 
##                 2
#Stats test for time in and out of phase transition: ANOVA
anovaModel=aov(timeSpent~phaseT+Error(pID/phaseT),dataOptTime)
anovaPValue=summary(anovaModel)[[2]][[1]][['Pr(>F)']][[1]]
print(paste("P-value for one way ANOVA:",signif(anovaPValue,digits=3)))
## [1] "P-value for one way ANOVA: 3.39e-07"
rm(dataInput)
#logistic with random effects (intercept)
linearMRandomIntercept = lmer(timeSpent ~ phaseT + (1|pID), data=dataOptTime)

title="Linear regressions"
tableName='opt04r'
modelTableStyle(linearMRandomIntercept,title,outputType,tableName)

[1] “opt04r”

Linear regressions
timeSpent
Random Intercept
phaseT 10.114***
(1.236)
p = 0.000
Constant 35.749***
(2.132)
p = 0.000
Observations 305
Log Likelihood -1,156.717
Akaike Inf. Crit. 2,321.433
Bayesian Inf. Crit. 2,336.314
Note: p<0.1; p<0.05; p<0.01

2.2.3 Effort and Phase Transition interaction effect on Accuracy

2.2.3.0.1 Is the effect of effort (time spent) on accuracy (Computational Performance) modulated by Instance Complexity (In/Out of Phase Transition)?
logitRandomIntercept = glmer(correct ~ scale(timeSpent) + phaseT +  phaseT:scale(timeSpent) + (1|pID), family=binomial(link="logit"), data=dataOptTime)

title="Logistic regressions"
tableName='opt05r'
modelTableStyle(logitRandomIntercept,title,outputType,tableName)

[1] “opt05r”

Logistic regressions
correct
Random Intercept
scale(timeSpent) -0.089
(0.735)
p = 0.905
phaseT -2.420***
(0.817)
p = 0.004
scale(timeSpent):phaseT -0.709
(0.760)
p = 0.352
Constant 3.914***
(0.796)
p = 0.00000
Observations 305
Log Likelihood -115.185
Akaike Inf. Crit. 240.369
Bayesian Inf. Crit. 258.971
Note: p<0.1; p<0.05; p<0.01

2.3 MZN Propagations

2.3.1 MZN Propagations and Accuracy (Computational Performance)

#MZN Propagations
groups4=dataOptProp %>% group_by(propagations)
dat4=summarise(groups4, accuracy=mean(correct), se=se(correct))

plo = ggplot(data=dat4, aes(y=accuracy, x=propagations)) +
  geom_point()+
  labs(title="Accuracy and MZN",y="Accuracy",x="MZN Complexity Measure (Propagations)")+
  theme(plot.title = element_text(hjust = 0.5)) +
  stat_smooth(method = glm)+
  geom_hline(aes(yintercept=0.5), colour="#990000", linetype="dashed")

outputName = "opt06g"
plotExport(plo,outputName)
## [1] "opt06g"
## [1] "~/Google Drive/Melbourne/UNIMELB/Research/Complexity Project/KS-IC/Code/Behavioural-Analysis"

## quartz_off_screen 
##                 2
logitRandomIntercept = glmer(correct ~ propagations + (1|pID), family=binomial(link="logit"), data=dataOptProp)
#summary(logitRandomInterceptSlope)

title="Logistic regressions"
tableName='opt06r'
modelTableStyle(logitRandomIntercept,title,outputType,tableName)

[1] “opt06r”

Logistic regressions
correct
Random Intercept
propagations -0.108***
(0.022)
p = 0.00001
Constant 4.230***
(0.593)
p = 0.000
Observations 358
Log Likelihood -149.815
Akaike Inf. Crit. 305.630
Bayesian Inf. Crit. 317.272
Note: p<0.1; p<0.05; p<0.01

2.3.2 MZN Propagations and Time Spent

#Average per propagations plotted
groups5=dataOptTime %>% group_by(propagations)
dat5=summarise(groups5, time=mean(timeSpent), se=se(propagations))

plo = ggplot(data=dat5, aes(y=time, x=propagations)) +
  geom_point()+
  labs(title="Time Spent and MZN Propagations",y="Time spent on an instance",x="MZN Complexity Measure (Propagations)")+
  theme(plot.title = element_text(hjust = 0.5)) +
  stat_smooth(method = glm)+
  geom_hline(aes(yintercept=0.5), colour="#990000", linetype="dashed")

outputName = "opt06g2"
plotExport(plo,outputName)
## [1] "opt06g2"
## [1] "~/Google Drive/Melbourne/UNIMELB/Research/Complexity Project/KS-IC/Code/Behavioural-Analysis"

## quartz_off_screen 
##                 2
linearMRandomIntercept = lmer(timeSpent ~ propagations + (1|pID), data=dataOptTime)

title="Linear regressions"
tableName='opt06r2'
modelTableStyle(linearMRandomIntercept,title,outputType,tableName)

[1] “opt06r2”

Linear regressions
timeSpent
Random Intercept
propagations 0.438***
(0.100)
p = 0.00002
Constant 32.505***
(3.013)
p = 0.000
Observations 305
Log Likelihood -1,180.119
Akaike Inf. Crit. 2,368.238
Bayesian Inf. Crit. 2,383.119
Note: p<0.1; p<0.05; p<0.01

2.4 SAT Decisions

2.4.1 SAT and Accuracy (Computational Performance)

#SAT Decisions
groups5=dataOptProp %>% group_by(decisions)
dat5=summarise(groups5, accuracy=mean(correct), se=se(correct))

plo = ggplot(data=dat5, aes(y=accuracy, x=decisions)) +
  geom_point()+
  labs(title="Accuracy and SAT",y="Accuracy",x="SAT Complexity Measure (Decisions)")+
  theme(plot.title = element_text(hjust = 0.5)) +
  stat_smooth(method = glm)+
  geom_hline(aes(yintercept=0.5), colour="#990000", linetype="dashed")

outputName = "opt07g"
plotExport(plo,outputName)
## [1] "opt07g"
## [1] "~/Google Drive/Melbourne/UNIMELB/Research/Complexity Project/KS-IC/Code/Behavioural-Analysis"

## quartz_off_screen 
##                 2
logitRandomIntercept = glmer(correct ~ decisions + (1|pID), family=binomial(link="logit"), data=dataOptProp)

title="Logistic regressions"
tableName='opt07r'
modelTableStyle(logitRandomIntercept,title,outputType,tableName)

[1] “opt07r”

Logistic regressions
correct
Random Intercept
decisions -0.026
(0.018)
p = 0.157
Constant 2.124***
(0.404)
p = 0.00000
Observations 358
Log Likelihood -160.808
Akaike Inf. Crit. 327.615
Bayesian Inf. Crit. 339.257
Note: p<0.1; p<0.05; p<0.01

2.4.2 SAT Decisions and Time Spent

groups5=dataOptTime %>% group_by(decisions)
dat5=summarise(groups5, time=mean(timeSpent), se=se(decisions))

plo = ggplot(data=dat5, aes(y=time, x=decisions)) +
  geom_point()+
  labs(title="Time Spent and SAT decisions",y="Time spent on an instance",x="SAT Complexity Measure (Decisions)")+
  theme(plot.title = element_text(hjust = 0.5)) +
  stat_smooth(method = glm)+
  geom_hline(aes(yintercept=0.5), colour="#990000", linetype="dashed")

outputName = "opt08g"
plotExport(plo,outputName)
## [1] "opt08g"
## [1] "~/Google Drive/Melbourne/UNIMELB/Research/Complexity Project/KS-IC/Code/Behavioural-Analysis"

## quartz_off_screen 
##                 2
linearMRandomIntercept = lmer(timeSpent ~ decisions + (1|pID), data=dataOptTime)

title="Linear regressions"
tableName='opt08r'
modelTableStyle(linearMRandomIntercept,title,outputType,tableName)

[1] “opt08r”

Linear regressions
timeSpent
Random Intercept
decisions 0.276***
(0.080)
p = 0.001
Constant 37.165***
(2.503)
p = 0.000
Observations 305
Log Likelihood -1,183.824
Akaike Inf. Crit. 2,375.647
Bayesian Inf. Crit. 2,390.529
Note: p<0.1; p<0.05; p<0.01

2.5 Sahni-K Analysis

2.5.1 Sahni-K and Accuracy (Computational Performance)

dataInput=dataOptProp %>% group_by(sahniK,pID)%>%summarise(accuracy1=mean(correct))%>%ungroup()%>%group_by(sahniK)%>%summarise(accuracy=mean(accuracy1),se=se(accuracy1))%>%ungroup()

plo= ggplot(data=dataInput, aes(y=accuracy, x=as.factor(sahniK),
                                label = round(accuracy,digits = 2))) +
  geom_bar(stat = "identity")+
  geom_errorbar(aes(ymin=accuracy-se, ymax=accuracy+se), width=.1)+
  geom_signif(comparisons = list(c("0","1")), annotations="***", 
             y_position = 1, tip_length = 0.03)+
  geom_signif(comparisons = list(c("1","2")), annotations="*", 
               y_position = 0.8, tip_length = 0.03)+
  labs(title="Computational Performance and sahni-K complexity",
       x="Sahni-K Complexity",y="Computational Performance")+
  coord_cartesian(ylim = c(0.3,1.03))+
  geom_text(hjust=0.5,vjust = 4.5, colour="white") +
  theme_light()+
  theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),plot.title = element_text(hjust = 0.5))+
  geom_hline(aes(yintercept = 0.5),linetype="dotted")

outputName = "opt09g"
plotExport(plo,outputName)
## [1] "opt09g"
## [1] "~/Google Drive/Melbourne/UNIMELB/Research/Complexity Project/KS-IC/Code/Behavioural-Analysis"

## quartz_off_screen 
##                 2
#Sahni-K boxplot: I think it says quite a bit
# dataInput=dataOptProp %>% group_by(sahniK,pID)%>%summarise(accuracy=mean(correct))%>%ungroup()
# 
# ggplot(dataInput, aes(x=as.factor(sahniK), y=accuracy))+
#   geom_boxplot(outlier.shape=1)+
#   #geom_boxplot( aes(x=as.logical(phaseT), y=accuracy),outlier.shape=1) +
#   stat_summary(fun.y=mean, colour="darkred", geom="point", size=2, shape=17) +
#   stat_summary(fun.y=mean, colour="darkred", geom="text", size=3, aes( label=round(..y.., digits=2)),hjust=1.5,vjust=0.5) +
#   #coord_cartesian(ylim = c(0,1)) +
#   labs(title="Accuracy and Sahni-K",x="Sahni-K",y="Percentage")+
#   theme(plot.title = element_text(hjust = 0.5))+#, axis.text.x = element_text(angle=45,hjust=1))+
#   geom_hline(aes(yintercept=0.5), colour="#990000", linetype="dashed")
logitRandomIntercept = glmer(correct ~ sahniK + (1|pID), family=binomial(link="logit"), data=dataOptProp)

title="Logistic regressions"
tableName='opt09r'
modelTableStyle(logitRandomIntercept,title,outputType,tableName)

[1] “opt09r”

Logistic regressions
correct
Random Intercept
sahniK -1.333***
(0.193)
p = 0.000
Constant 2.310***
(0.219)
p = 0.000
Observations 358
Log Likelihood -135.816
Akaike Inf. Crit. 277.633
Bayesian Inf. Crit. 289.274
Note: p<0.1; p<0.05; p<0.01
#logistic corroborations of significance shown in Accuracy/SahniK plot
dataInput =dataOptProp
dataInput$sahniK0= (dataInput$sahniK==0)
dataInput$sahniK1= (dataInput$sahniK==1)
dataInput$sahniK2= (dataInput$sahniK==2)

#logitRandomIntercept0 = glmer(correct ~ sahniK1 + sahniK2 + (1|pID), family=binomial(link="logit"), data=dataInput)

logitRandomIntercept1 = glmer(correct ~ sahniK0 + sahniK2 + (1|pID), family=binomial(link="logit"), data=dataInput)

title="Logistic regressions"
tableName='opt09r1'
modelTableStyle(logitRandomIntercept1,title,outputType,tableName)

[1] “opt09r1”

Logistic regressions
correct
Random Intercept
sahniK0 1.751***
(0.398)
p = 0.00002
sahniK2 -0.828*
(0.463)
p = 0.074
Constant 0.625*
(0.337)
p = 0.064
Observations 358
Log Likelihood -135.116
Akaike Inf. Crit. 278.233
Bayesian Inf. Crit. 293.755
Note: p<0.1; p<0.05; p<0.01

2.5.2 Sahni-K and Time Spent

dataInput=dataOptTime %>% group_by(sahniK,pID)%>%summarise(timeSpentMeans=mean(timeSpent))%>%ungroup()%>%group_by(sahniK)%>%summarise(time=mean(timeSpentMeans),se=se(timeSpentMeans))%>%ungroup()

plo= ggplot(data=dataInput, aes(y=time, x=as.factor(sahniK),
                                label = round(time,digits = 1))) +
  geom_bar(stat = "identity")+
  geom_errorbar(aes(ymin=time-se, ymax=time+se), width=.1)+
  geom_signif(comparisons = list(c("1","2")), annotations="**", 
               y_position = 52, tip_length = 0.03)+
 geom_signif(comparisons = list(c("0","2")), annotations="***", 
             y_position = 56, tip_length = 0.03)+
 geom_signif(comparisons = list(c("0","1")), annotations="NS", 
             y_position = 48, tip_length = 0.03)+
  labs(title="Time spent on an instance and sahni-K complexity",
       x="Sahni-K Complexity",y="Average time spent on an instance")+
  coord_cartesian(ylim = c(0,57))+
  geom_text(hjust=0.5,vjust = 5, colour="white") +
  theme_light()+
  theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),plot.title = element_text(hjust = 0.5))

outputName = "opt10g"
plotExport(plo,outputName)
## [1] "opt10g"
## [1] "~/Google Drive/Melbourne/UNIMELB/Research/Complexity Project/KS-IC/Code/Behavioural-Analysis"

## quartz_off_screen 
##                 2
linearMRandomIntercept = lmer(timeSpent ~ sahniK + (1|pID), data=dataOptTime)

title="Linear regressions"
tableName='opt10r'
modelTableStyle(linearMRandomIntercept,title,outputType,tableName)

[1] “opt10r”

Linear regressions
timeSpent
Random Intercept
sahniK 3.125***
(0.950)
p = 0.001
Constant 41.469***
(1.990)
p = 0.000
Observations 305
Log Likelihood -1,181.856
Akaike Inf. Crit. 2,371.712
Bayesian Inf. Crit. 2,386.593
Note: p<0.1; p<0.05; p<0.01
#logistic corroborations of significance shown in Accuracy/SahniK plot for time
dataInput =dataOptProp
dataInput$sahniK0= (dataInput$sahniK==0)
dataInput$sahniK1= (dataInput$sahniK==1)
dataInput$sahniK2= (dataInput$sahniK==2)

linearRandomIntercept0 = lmer(timeSpent ~ sahniK1 + sahniK2 + (1|pID), data=dataInput)

linearRandomIntercept1 = lmer(timeSpent ~ sahniK0 + sahniK2 + (1|pID), data=dataInput)

title="Linear regressions"
tableName='opt09r2a'
modelTableStyle(list(linearRandomIntercept0,linearRandomIntercept0),title,outputType,tableName)

[1] “opt09r2a”

Linear regressions
timeSpent
Random Intercept
(1) (2)
sahniK1 0.928 0.928
(1.732) (1.732)
p = 0.593 p = 0.593
sahniK2 6.011*** 6.011***
(1.732) (1.732)
p = 0.001 p = 0.001
Constant 44.360*** 44.360***
(2.212) (2.212)
p = 0.000 p = 0.000
Observations 358 358
Log Likelihood -1,362.686 -1,362.686
Akaike Inf. Crit. 2,735.371 2,735.371
Bayesian Inf. Crit. 2,754.774 2,754.774
Note: p<0.1; p<0.05; p<0.01
tableName='opt09r2b'
modelTableStyle(list(linearRandomIntercept1,linearRandomIntercept1),title,outputType,tableName)

[1] “opt09r2b”

Linear regressions
timeSpent
Random Intercept
(1) (2)
sahniK0 -0.928 -0.928
(1.732) (1.732)
p = 0.593 p = 0.593
sahniK2 5.084** 5.084**
(2.290) (2.290)
p = 0.027 p = 0.027
Constant 45.288*** 45.288***
(2.672) (2.672)
p = 0.000 p = 0.000
Observations 358 358
Log Likelihood -1,362.686 -1,362.686
Akaike Inf. Crit. 2,735.371 2,735.371
Bayesian Inf. Crit. 2,754.774 2,754.774
Note: p<0.1; p<0.05; p<0.01

2.5.3 Sahni-K, IC and Effort

2.5.3.0.1 Is effort (time spent) modulated by Sahni-K or IC?
linearRandomIntercept = lmer(timeSpent ~ sahniK + phaseT + sahniK:phaseT + (1|pID), data=dataOptTime)

title="Linear regressions"
tableName='opt11r'
modelTableStyle(linearRandomIntercept,title,outputType,tableName)

[1] “opt11r”

Linear regressions
timeSpent
Random Intercept
sahniK 1.489
(2.687)
p = 0.580
phaseT 9.535***
(1.366)
p = 0.000
sahniK:phaseT 0.502
(2.844)
p = 0.861
Constant 35.498***
(2.178)
p = 0.000
Observations 305
Log Likelihood -1,151.532
Akaike Inf. Crit. 2,315.065
Bayesian Inf. Crit. 2,337.386
Note: p<0.1; p<0.05; p<0.01

2.6 Other Performance Measures

2.6.1 Profit Reached (Value Performance)

2.6.1.1 Phase Transition and Value Performance

dataInput=dataOptProp
dataInput=dataInput[dataInput$capacitySel<=dataInput$capacity,]
dataInput$profitNormalised=dataInput$profitSel/dataInput$profitOpt

dataInput2=dataInput %>% group_by(phaseT,pID)%>%summarise(profitNormalised1=mean(profitNormalised))%>%ungroup()%>%group_by(phaseT)%>%summarise(profitNormalised=mean(profitNormalised1),se=se(profitNormalised1))%>%ungroup()

plo = ggplot(data=dataInput2, aes(y=profitNormalised, x=as.factor(as.logical(phaseT)), label = round(profitNormalised,digits = 2),group=1)) +
  geom_line()+
  geom_errorbar(aes(ymin=profitNormalised-se, ymax=profitNormalised+se), width=.1) +
  geom_point(shape=21, size=3, fill="white")+
  labs(title="Profit Normalised In and Out of phase Transition",x="In Phase Transition?",y="Value Performance")+
  theme(plot.title = element_text(hjust = 0.5), axis.text.x = element_text(angle=0,hjust=0.5))+
  geom_text(hjust=-0.5, colour="black")
  #coord_cartesian(ylim = c(0,1)) 

outputName = "opt11Ag"
plotExport(plo,outputName)
## [1] "opt11Ag"
## [1] "~/Google Drive/Melbourne/UNIMELB/Research/Complexity Project/KS-IC/Code/Behavioural-Analysis"

## quartz_off_screen 
##                 2
#Effect of phase Transition in Value (economic) performance
dataInput=dataInput

linearRandomIntercept = lmer(profitNormalised ~ phaseT + (1|pID), data=dataInput)

title="Linear regressions"
tableName='opt11Ar'
modelTableStyle(linearRandomIntercept,title,outputType,tableName)

[1] “opt11Ar”

Linear regressions
profitNormalised
Random Intercept
phaseT -0.015***
(0.004)
p = 0.00005
Constant 0.999***
(0.003)
p = 0.000
Observations 347
Log Likelihood 685.782
Akaike Inf. Crit. -1,363.564
Bayesian Inf. Crit. -1,348.166
Note: p<0.1; p<0.05; p<0.01

2.6.1.2 Time Spent and value performance

dataTemp=dataOptTime
dataTemp=dataTemp[dataTemp$capacitySel<dataTemp$capacity,]
dataTemp$profitNormalised=dataTemp$profitSel/dataTemp$profitOpt

plo = ggplot(data=dataTemp, aes(x=timeSpent, y=profitNormalised)) +
  geom_point()+
  labs(title="Effort and Economic Preformance",x="Time spent on an instance",y="Profit Reached")+
  theme(plot.title = element_text(hjust = 0.5)) +
  stat_smooth(method = glm)

outputName = "opt12g"
plotExport(plo,outputName)
## [1] "opt12g"
## [1] "~/Google Drive/Melbourne/UNIMELB/Research/Complexity Project/KS-IC/Code/Behavioural-Analysis"

## quartz_off_screen 
##                 2
cor.test(dataTemp$timeSpent,dataTemp$profitNormalised)
## 
##  Pearson's product-moment correlation
## 
## data:  dataTemp$timeSpent and dataTemp$profitNormalised
## t = -3.0554, df = 284, p-value = 0.002461
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.28841337 -0.06373278
## sample estimates:
##        cor 
## -0.1783975
linearMRandomIntercept = lmer(profitNormalised ~ timeSpent + (1|pID), data=dataTemp)

title="Linear regressions"
tableName='opt12r'
modelTableStyle(linearMRandomIntercept,title,outputType,tableName)

[1] “opt12r”

Linear regressions
profitNormalised
Random Intercept
timeSpent -0.0005***
(0.0001)
p = 0.001
Constant 1.009***
(0.006)
p = 0.000
Observations 286
Log Likelihood 596.883
Akaike Inf. Crit. -1,185.765
Bayesian Inf. Crit. -1,171.141
Note: p<0.1; p<0.05; p<0.01

2.6.2 Item Performance (Number of clicks away from the optimum)

2.6.2.1 Phase Transition and item Performance

dataInput=dataOptProp

dataInput2=dataInput %>% group_by(phaseT,pID)%>%summarise(meanDistance1=mean(itemDistanceFromOpt))%>%ungroup()%>%group_by(phaseT)%>%summarise(meanDistance=mean(meanDistance1),se=se(meanDistance1))%>%ungroup()

plo = ggplot(data=dataInput2, aes(y=meanDistance, x=as.factor(as.logical(phaseT)), label = round(meanDistance,digits = 2),group=1)) +
  geom_line()+
  geom_errorbar(aes(ymin=meanDistance-se, ymax=meanDistance+se), width=.1) +
  geom_point(shape=21, size=3, fill="white")+
  labs(title="Item Performance In and Out of phase Transition",x="In Phase Transition?",y="Item Performance")+
  theme(plot.title = element_text(hjust = 0.5), axis.text.x = element_text(angle=0,hjust=0.5))+
  geom_text(hjust=-0.5, colour="black")

outputName = "opt13g"
plotExport(plo,outputName)
## [1] "opt13g"
## [1] "~/Google Drive/Melbourne/UNIMELB/Research/Complexity Project/KS-IC/Code/Behavioural-Analysis"

## quartz_off_screen 
##                 2
#Trial (experience effect) effect on timeSpent
dataInput=dataOptProp

linearRandomIntercept = lmer(itemDistanceFromOpt ~ phaseT + (1|pID), data=dataInput)

title="Linear regressions"
tableName='opt13r'
modelTableStyle(linearRandomIntercept,title,outputType,tableName)

[1] “opt13r”

Linear regressions
itemDistanceFromOpt
Random Intercept
phaseT 0.410***
(0.092)
p = 0.00001
Constant 0.076
(0.076)
p = 0.319
Observations 358
Log Likelihood -441.574
Akaike Inf. Crit. 891.147
Bayesian Inf. Crit. 906.669
Note: p<0.1; p<0.05; p<0.01

2.6.2.2 Time Spent and Item Performance

plo = ggplot(data=dataOptTime, aes(x=timeSpent, y=itemDistanceFromOpt)) +
  geom_point()+
  labs(title="Effort and Item Preformance",x="Time spent on an instance",y="Number of clicks away from the optimum")+
  theme(plot.title = element_text(hjust = 0.5)) +
  stat_smooth(method = glm)

outputName = "opt14g"
plotExport(plo,outputName)
## [1] "opt14g"
## [1] "~/Google Drive/Melbourne/UNIMELB/Research/Complexity Project/KS-IC/Code/Behavioural-Analysis"

## quartz_off_screen 
##                 2
cor.test(dataOptTime$timeSpent,dataOptTime$itemDistanceFromOpt)
## 
##  Pearson's product-moment correlation
## 
## data:  dataOptTime$timeSpent and dataOptTime$itemDistanceFromOpt
## t = 4.3738, df = 303, p-value = 1.681e-05
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.1350811 0.3465155
## sample estimates:
##       cor 
## 0.2436916
linearMRandomIntercept = lmer(itemDistanceFromOpt ~ timeSpent + (1|pID), data=dataOptTime)

title="Linear regressions"
tableName='opt14r'
modelTableStyle(linearMRandomIntercept,title,outputType,tableName)

[1] “opt14r”

Linear regressions
itemDistanceFromOpt
Random Intercept
timeSpent 0.017***
(0.004)
p = 0.00001
Constant -0.387**
(0.165)
p = 0.019
Observations 305
Log Likelihood -374.882
Akaike Inf. Crit. 757.765
Bayesian Inf. Crit. 772.646
Note: p<0.1; p<0.05; p<0.01

2.7 Time Spent and Accuracy (Computational Performance)

dataInput= dataOptTime %>% group_by(correct,pID) %>% summarise(timeSpentMeans=mean(timeSpent)) %>% ungroup() %>% group_by(correct) %>%
  summarise(time=mean(timeSpentMeans),se=se(timeSpentMeans))%>%ungroup()

plo = ggplot(data=dataInput, aes(y=time, x=as.factor(as.logical(correct)), label = round(time,digits = 2),group=1)) +
  geom_line()+
  geom_errorbar(aes(ymin=time-se, ymax=time+se), width=.1) +
  geom_point(shape=21, size=3, fill="white")+
  labs(title="Time Spent on Correct/Incorrect instances",x="Reached Optimum",y="Time Spent") +
  theme(plot.title = element_text(hjust = 0.5), axis.text.x = element_text(angle=0,hjust=0.5))+
  geom_text(hjust=-0.5, colour="black")

outputName = "opt15g"
plotExport(plo,outputName)
## [1] "opt15g"
## [1] "~/Google Drive/Melbourne/UNIMELB/Research/Complexity Project/KS-IC/Code/Behavioural-Analysis"

## quartz_off_screen 
##                 2
#Stats test for time in and out of phase transition: ANOVA
anovaModel=aov(timeSpent~correct+Error(pID/correct),dataOptTime)
anovaPValue=summary(anovaModel)[[2]][[1]][['Pr(>F)']][[1]]
print(paste("P-value for one way ANOVA:",signif(anovaPValue,digits=3)))
## [1] "P-value for one way ANOVA: 1.27e-05"
rm(dataInput)
#Stats test for time for correct/incorrect answers
logitRandomIntercept = glmer(correct ~ timeSpent + (1|pID), family=binomial(link="logit"), data=dataOptTime)

title="Logistic regressions"
tableName='opt15r'
modelTableStyle(logitRandomIntercept,title,outputType,tableName)

[1] “opt15r”

Logistic regressions
correct
Random Intercept
timeSpent -0.072***
(0.017)
p = 0.00003
Constant 4.975***
(0.865)
p = 0.000
Observations 305
Log Likelihood -124.754
Akaike Inf. Crit. 255.509
Bayesian Inf. Crit. 266.670
Note: p<0.1; p<0.05; p<0.01

3 Decision Vs. Optimisation

names(summaryListDec[[2]])['mean(correct)'==names(summaryListDec[[2]])]='AccuracyDecision'
names(summaryListOpt[[2]])['mean(correct)'==names(summaryListOpt[[2]])]='AccuracyOptimisation'

varMerge = merge(summaryListDec[[2]], summaryListOpt[[2]], by="pID")
#varMerge = varMerge %>% filter(pID != "be16")#Outlier

cortest = cor.test(varMerge$AccuracyDecision,varMerge$AccuracyOptimisation,method="pearson")#spearman is non-parametric
print(cortest)
## 
##  Pearson's product-moment correlation
## 
## data:  varMerge$AccuracyDecision and varMerge$AccuracyOptimisation
## t = 2.4059, df = 18, p-value = 0.02709
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.06494383 0.76813291
## sample estimates:
##      cor 
## 0.493288
plo = ggplot(varMerge,aes_string(x="AccuracyDecision",y="AccuracyOptimisation"))+
  geom_point(shape=23, size=3, fill="red") +
  geom_smooth(method="lm",se=FALSE,linetype="dashed")+#GLM or LM(SAT...)
  labs(x="Knapsack Accuracy: Decision",y="Knapsack Accuracy: Optimisation")+
  theme_light()+
  theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),plot.title = element_text(hjust = 0.5))

outputName = "decOpt01g"
plotExport(plo,outputName)
## [1] "decOpt01g"
## [1] "~/Google Drive/Melbourne/UNIMELB/Research/Complexity Project/KS-IC/Code/Behavioural-Analysis"

## quartz_off_screen 
##                 2

4 Appendix Test Corroborations

4.1 Controlling for effort, is Phase Transition still significant in the optimisation task?

#Keeping timeSpent constant. is Phase transition effect on accuracy significant?
logitRandomIntercept = glmer(correct ~ phaseT + timeSpent + (1|pID), family=binomial(link="logit"), data=dataOptTime)

title="Logistic regressions"
tableName='app01r'
modelTableStyle(logitRandomIntercept,title,outputType,tableName)

[1] “app01r”

Logistic regressions
correct
Random Intercept
phaseT -2.359***
(0.743)
p = 0.002
timeSpent -0.055***
(0.017)
p = 0.002
Constant 6.137***
(1.062)
p = 0.000
Observations 305
Log Likelihood -115.648
Akaike Inf. Crit. 239.296
Bayesian Inf. Crit. 254.177
Note: p<0.1; p<0.05; p<0.01

4.2 Solvability and accuracy (Computational Performance) for KS Decision Task

dataInput=dataDecProp %>% group_by(sol,pID)%>%summarise(accuracy1=mean(correct))%>%ungroup()%>%group_by(sol)%>%summarise(accuracy=mean(accuracy1),se=se(accuracy1))%>%ungroup()

plo = ggplot(data=dataInput, aes(y=accuracy, x=as.factor(as.logical(sol)), label = round(accuracy,digits = 2),group=1)) +
  geom_line()+
  geom_errorbar(aes(ymin=accuracy-se, ymax=accuracy+se), width=.1) +
  geom_point(shape=21, size=3, fill="white")+
  labs(title="Accuracy depending on solvability",x="Does the instance have a solution?",y="Accuracy")+
  theme(plot.title = element_text(hjust = 0.5), axis.text.x = element_text(angle=0,hjust=0.5))+
  geom_text(hjust=-0.5, colour="black")

outputName = "app02g"
plotExport(plo,outputName)
## [1] "app02g"
## [1] "~/Google Drive/Melbourne/UNIMELB/Research/Complexity Project/KS-IC/Code/Behavioural-Analysis"

## quartz_off_screen 
##                 2
anovaModel=aov(correct~sol+Error(pID/sol),dataDecProp)
anovaPValue=summary(anovaModel)[[2]][[1]][['Pr(>F)']][[1]]
print(paste("P-value for one way ANOVA:",signif(anovaPValue,digits=3)))

[1] “P-value for one way ANOVA: 0.163”

rm(dataInput)
#logistic with random effects (intercept)
logitRandomIntercept = glmer(correct ~ answer + (1|pID), family=binomial(link="logit"), data=dataDecProp)

title="Logistic regressions"
tableName='app02r'
modelTableStyle(logitRandomIntercept,title,outputType,tableName)

[1] “app02r”

Logistic regressions
correct
Random Intercept
answer 0.168
(0.144)
p = 0.245
Constant 1.565***
(0.131)
p = 0.000
Observations 1,427
Log Likelihood -639.750
Akaike Inf. Crit. 1,285.500
Bayesian Inf. Crit. 1,301.290
Note: p<0.1; p<0.05; p<0.01

4.3 Do people still modulate their effort when they don’t find the solution in the optimisation task?

linearRandomIntercept = lmer(timeSpent ~ phaseT + correct + phaseT:correct + (1|pID), data=dataOptTime)

title="Linear regressions"
tableName='app03r'
modelTableStyle(linearRandomIntercept,title,outputType,tableName)

[1] “app03r”

Linear regressions
timeSpent
Random Intercept
phaseT 15.161**
(7.261)
p = 0.037
correct -0.489
(7.210)
p = 0.946
phaseT:correct -6.784
(7.374)
p = 0.358
Constant 36.227***
(7.379)
p = 0.00000
Observations 305
Log Likelihood -1,142.942
Akaike Inf. Crit. 2,297.885
Bayesian Inf. Crit. 2,320.207
Note: p<0.1; p<0.05; p<0.01

5 Arithmetic Test

First 3 trials where regarded as practice trials.

# Import Math Data (Mental Arithmetic)
dataMath=importMathInfo(folderDataMath)
dataMath=merge(dataMath,partLog, by="pID" )

# Filter defined versions
dataMath=dplyr::filter(dataMath,ExpVersion %in% versions)

#Generate Correct/Incorrect Variable
dataMath$correct= (dataMath$answer == dataMath$solution)
dataMath$correct[is.na(dataMath$correct)]=FALSE

#Filter First 3 trials of task; they were considered practice trials
dataMath=dataMath %>% filter( trial>= 4 | block != 1 )

summaryMath=dataMath %>% group_by(pID) %>% summarise(mean(correct), mean(timeSpent), mean(submitted))
names(summaryMath)['mean(correct)'==names(summaryMath)]='AccuracyMath'
print(paste('Number of participants to analyse:',length(unique(dataMath$pID)) ) )
## [1] "Number of participants to analyse: 20"
print(paste('Participants To be analysed are:',paste(unique(dataMath$pID),collapse=" ")))
## [1] "Participants To be analysed are: be14 be15 be16 be17 be18 be19 be20 be21 be22 be23 be24 be25 be26 be27 be28 be29 be30 be31 be32 be33"
print(paste("The overall MATH accuracy is",mean(dataMath$correct)))
## [1] "The overall MATH accuracy is 0.628333333333333"

5.1 Accuracy in Decision Knapsack problem and Mental Arithmetic

names(summaryListDec[[2]])['AccuracyDecision'==names(summaryListDec[[2]])]='AccuracyKnapsack'
mathMerge = merge(summaryListDec[[2]], summaryMath, by="pID")

#mathMerge = mathMerge %>% filter(pID != "be16")

plo =  ggplot(mathMerge,aes(x=AccuracyKnapsack,y=AccuracyMath))+ 
  geom_point() +
  geom_smooth(method="lm")#GLM or LM(SAT...)


outputName = "math01g"
plotExport(plo,outputName)
## [1] "math01g"
## [1] "~/Google Drive/Melbourne/UNIMELB/Research/Complexity Project/KS-IC/Code/Behavioural-Analysis"

## quartz_off_screen 
##                 2
mathDec = cor.test(mathMerge$AccuracyKnapsack,mathMerge$AccuracyMath,method="pearson")#spearman is non-parametric vs. pearson which is parametric (linear)

5.2 Accuracy in Optimisation Knapsack problem and Mental Arithmetic

#Accuracy of Knapsack vs. Math Task Accuracy
names(summaryListOpt[[2]])['AccuracyOptimisation'==names(summaryListOpt[[2]])]='AccuracyKnapsack'
mathMerge = merge(summaryListOpt[[2]], summaryMath, by="pID")

plo = ggplot(mathMerge,aes(x=AccuracyKnapsack,y=AccuracyMath))+
  geom_point() +
  geom_smooth(method="lm")

outputName = "math02g"
plotExport(plo,outputName)
## [1] "math02g"
## [1] "~/Google Drive/Melbourne/UNIMELB/Research/Complexity Project/KS-IC/Code/Behavioural-Analysis"

## quartz_off_screen 
##                 2
mathOpt =cor.test(mathMerge$AccuracyKnapsack,mathMerge$AccuracyMath,method="pearson")#spearman is non-parametric

6 CANTAB Task

#CANTAB Data import
dataCantab=importCantabInfo(folderDataCantab)
names(dataCantab)[names(dataCantab)=="subject.ID"]="pID"
dataCantab=merge(dataCantab,partLog, by="pID" )
#Subsetting the relevant versions
dataCantab=dplyr::filter(dataCantab,ExpVersion %in% versions)

# Subsetting only completed Tests
dataCantabWork=dataCantab[dataCantab$PAL.Recommended.Standard.Extended.Status=="COMPLETED",]
#Variables of interest
vInterest=c("pID","PALFAMS28","PALTEA28","SSPFSL","SWMS","SWMBE4","SWMBE6","SWMBE8","SWMBE12","SWMBE468","SWMTE4","SWMTE6","SWMTE8","SWMTE12","SWMTE468")
dataCantabWork=dataCantabWork[,vInterest]

#Generates new measure for SWM
SWMweights=c(4,3,2,1)
dataCantabWork$SWMTEweightedJP=dataCantabWork$SWMTE4 * SWMweights[1] + dataCantabWork$SWMTE6 * SWMweights[2] +dataCantabWork$SWMTE8 * SWMweights[3] #+ dataCantabWork$SWMTE12 * SWMweights[4]
dataCantabWork$SWMBEweightedJP=dataCantabWork$SWMBE4 * SWMweights[1] + dataCantabWork$SWMBE6 * SWMweights[2] +dataCantabWork$SWMBE8 * SWMweights[3] #+ dataCantabWork$SWMBE12 * SWMweights[4]

#Drops unwanted CANTAB Tasks
dropC=c("SWMBE4","SWMBE6","SWMBE8","SWMBE12","SWMBE468","SWMTE4","SWMTE6","SWMTE8","SWMTE12","SWMTE468")
dataCantabWork=dataCantabWork[,!(names(dataCantabWork) %in% dropC)]

#Actual variable list to be used for correlation analysis with knapsack performance measures.
variables=names(dataCantabWork)[-1]
#Analysis of a variables and mean Decision KS accuracy
decResults=list()
for (i in 1:length(variables)){
  #print(i)
  summaryVar=dataCantabWork[,c("pID",variables[i])]
  #summaryVar=dataCantabWork[,c("subject.ID","SSPFSL")]
  names(summaryVar)[1]="pID"
  decResults[[i]]=indDiffAnalysis(summaryVar,dataAllDec)
}
## [1] "PALFAMS28"
## 
##  Pearson's product-moment correlation
## 
## data:  varMerge$AccuracyKnapsack and varMerge[[varName]]
## t = -0.48043, df = 18, p-value = 0.6367
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.5287144  0.3472938
## sample estimates:
##        cor 
## -0.1125195

## [1] "PALTEA28"
## 
##  Pearson's product-moment correlation
## 
## data:  varMerge$AccuracyKnapsack and varMerge[[varName]]
## t = 0.67781, df = 18, p-value = 0.5065
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.3061315  0.5611094
## sample estimates:
##       cor 
## 0.1577611

## [1] "SSPFSL"
## 
##  Pearson's product-moment correlation
## 
## data:  varMerge$AccuracyKnapsack and varMerge[[varName]]
## t = -0.0814, df = 18, p-value = 0.936
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.4578171  0.4269626
## sample estimates:
##         cor 
## -0.01918253

## [1] "SWMS"
## 
##  Pearson's product-moment correlation
## 
## data:  varMerge$AccuracyKnapsack and varMerge[[varName]]
## t = -1.5439, df = 18, p-value = 0.14
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.6813704  0.1184952
## sample estimates:
##        cor 
## -0.3419566

## [1] "SWMTEweightedJP"
## 
##  Pearson's product-moment correlation
## 
## data:  varMerge$AccuracyKnapsack and varMerge[[varName]]
## t = -0.70894, df = 18, p-value = 0.4874
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.5660508  0.2995542
## sample estimates:
##        cor 
## -0.1648142

## [1] "SWMBEweightedJP"
## 
##  Pearson's product-moment correlation
## 
## data:  varMerge$AccuracyKnapsack and varMerge[[varName]]
## t = -0.82745, df = 18, p-value = 0.4188
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.5844390  0.2743335
## sample estimates:
##        cor 
## -0.1914261

#Analysis of a variables and mean Optimisation KS accuracy

optResults=list()
for (i in 1:length(variables)){
  summaryVar=dataCantabWork[,c("pID",variables[i])]
  names(summaryVar)[1]="pID"
  optResults[[i]]=indDiffAnalysis(summaryVar,dataAllOpt)
}
## [1] "PALFAMS28"
## 
##  Pearson's product-moment correlation
## 
## data:  varMerge$AccuracyKnapsack and varMerge[[varName]]
## t = 0.82498, df = 18, p-value = 0.4202
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.2748634  0.5840616
## sample estimates:
##       cor 
## 0.1908739

## [1] "PALTEA28"
## 
##  Pearson's product-moment correlation
## 
## data:  varMerge$AccuracyKnapsack and varMerge[[varName]]
## t = -0.74953, df = 18, p-value = 0.4632
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.5724234  0.2909486
## sample estimates:
##        cor 
## -0.1739711

## [1] "SSPFSL"
## 
##  Pearson's product-moment correlation
## 
## data:  varMerge$AccuracyKnapsack and varMerge[[varName]]
## t = 0.91143, df = 18, p-value = 0.3741
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.2563090  0.5970615
## sample estimates:
##       cor 
## 0.2100344

## [1] "SWMS"
## 
##  Pearson's product-moment correlation
## 
## data:  varMerge$AccuracyKnapsack and varMerge[[varName]]
## t = -1.5912, df = 18, p-value = 0.129
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.6869391  0.1081608
## sample estimates:
##        cor 
## -0.3511681

## [1] "SWMTEweightedJP"
## 
##  Pearson's product-moment correlation
## 
## data:  varMerge$AccuracyKnapsack and varMerge[[varName]]
## t = -1.458, df = 18, p-value = 0.1621
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.6710119  0.1372686
## sample estimates:
##        cor 
## -0.3249937

## [1] "SWMBEweightedJP"
## 
##  Pearson's product-moment correlation
## 
## data:  varMerge$AccuracyKnapsack and varMerge[[varName]]
## t = -1.4186, df = 18, p-value = 0.1731
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.6661452  0.1458928
## sample estimates:
##        cor 
## -0.3171001

pearson_se = function(cortest){
  Standard.Error = unname(sqrt((1 - cortest$estimate^2)/cortest$parameter))
  return(Standard.Error)
}

cogTasksTable=data.frame(Task=c("Mental Arithmetic"),
               KS_Dec_corr=c(mathDec$estimate),
               KS_Dec_Pvalue=c(mathDec$p.value),
               KS_Dec_df=c(mathDec$parameter),
               KS_Dec_se=c(pearson_se(mathDec)),
               KS_Opt_corr=c(mathOpt$estimate),
               KS_Opt_Pvalue=c(mathOpt$p.value),
               KS_Opt_df=c(mathOpt$parameter),
               KS_Opt_se=c(pearson_se(mathOpt)),
               stringsAsFactors=FALSE,row.names = NULL)

for (i in 1:length(variables)){
  row=list(variables[i],
           decResults[[i]]$corrTest$estimate,
           decResults[[i]]$corrTest$p.value,
           decResults[[i]]$corrTest$parameter,
           pearson_se(decResults[[i]]$corrTest),
           optResults[[i]]$corrTest$estimate, 
           optResults[[i]]$corrTest$p.value,
          optResults[[i]]$corrTest$parameter,
          pearson_se(optResults[[i]]$corrTest))
  cogTasksTable=rbind(cogTasksTable,row)
}
title="Cognitive and Knapsack Abilities Correlations"
outputName="cogr01"
exportTable(cogTasksTable,title,outputType,outputName)

[1] “cogr01”

Task KS_Dec_corr KS_Dec_Pvalue KS_Dec_df KS_Dec_se KS_Opt_corr KS_Opt_Pvalue KS_Opt_df KS_Opt_se
Mental Arithmetic 0.022 0.926 18 0.236 0.359 0.120 18 0.220
PALFAMS28 -0.113 0.637 18 0.234 0.191 0.420 18 0.231
PALTEA28 0.158 0.507 18 0.233 -0.174 0.463 18 0.232
SSPFSL -0.019 0.936 18 0.236 0.210 0.374 18 0.230
SWMS -0.342 0.140 18 0.221 -0.351 0.129 18 0.221
SWMTEweightedJP -0.165 0.487 18 0.232 -0.325 0.162 18 0.223
SWMBEweightedJP -0.191 0.419 18 0.231 -0.317 0.173 18 0.224

Refer to “tables.Rmd” for the code on how to combine the tables.